Scenario B - Baseline Variation

In this scenario the baseline underlying a spectrum with a fixed number of Gaussian peaks is varied between no baseline, a constant y-offset and a linear baseline. All datasets contain 3 peaks and the noise level is kept constant at 1%.

The models used in the inference of the parameters are formulated as follows:

  1. No baseline: \begin{equation} \large y = f(x) = \sum\limits_{m=1}^M \big[A_m \cdot e^{-\frac{(x-\mu_m)^2}{2\cdot\sigma_m^2}}\big] + \epsilon \end{equation}

  2. Constant offset: \begin{equation} \large y = f(x) = \sum\limits_{m=1}^M \big[A_m \cdot e^{-\frac{(x-\mu_m)^2}{2\cdot\sigma_m^2}}\big] + a_0 + \epsilon \end{equation}

  3. Linear baseline: \begin{equation} \large y = f(x) = \sum\limits_{m=1}^M \big[A_m \cdot e^{-\frac{(x-\mu_m)^2}{2\cdot\sigma_m^2}}\big] + a_0 + a_1 \cdot x + \epsilon \end{equation}

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pymc3 as pm
import arviz as az

#az.style.use('arviz-darkgrid')

print('Running on PyMC3 v{}'.format(pm.__version__))
WARNING (theano.tensor.blas): Using NumPy C-API based implementation for BLAS functions.
Running on PyMC3 v3.8

Import local modules

In [2]:
import os
import sys
sys.path.append('../../modules')
import datagen as dg
import models as mdl
import results as res
import figures as fig
import settings as cnf

Local configuration

In [3]:
# output for results and images
out_path      = './output_baseline'
file_basename = out_path + '/scenario_baseline'
        
conf = {}
    
# scenario name
conf['scenario'] = 'baseline variation'
    
# initialization method for sampler
conf['init_mode'] = 'adapt_diag'

# probabilistic model (priors)
conf['prior_model'] = 'lognormal'

# provide peak positions to the model as testvalues ('yes'/'no')
conf['peak_info'] = 'yes'

# model mode ('train'/eval')
conf['model_mode'] = 'train'

# data mode ('generate'/'preload')
conf['data_mode'] = 'generate'

# dataset directory (needed for 'preload' data mode)
#conf['dataset_dir'] = './input_datasets'

# number of cores to run sampling chains on
conf['ncores'] = 2

# number of samples per chain
conf['nsamples'] = 2000
In [4]:
# if the output dir does not exist, create it
if not os.path.exists(out_path):
    os.makedirs(out_path)

conf
Out[4]:
{'scenario': 'baseline variation',
 'init_mode': 'adapt_diag',
 'prior_model': 'lognormal',
 'peak_info': 'yes',
 'model_mode': 'train',
 'data_mode': 'generate',
 'ncores': 2,
 'nsamples': 2000}

Save configuration

In [5]:
cnf.save(out_path, conf)

Generate data and plot

In [6]:
# list of wavelengths (x-values)
xval = [i for i in range(200, 400, 2)]

ldata  = []
lpeaks = []
lbline = []

# number of spectra per baseline variation
nsets  = 4

# baseline variation
baselines = ['none', 'offset', 'linear']

lbline = [bl for bl in baselines for i in range(nsets)]

# total number of datasets
tsets = nsets * len(baselines)
        
if conf['model_mode'] == 'train' and conf['data_mode'] == 'generate':
    # generate the datasets
    for blv in baselines:
        for i in range(nsets):
            df, peaks, _ = dg.data_generator(xvalues=xval, nsamples=15, npeaks=3, tbaseline=blv)
            ldata.append(df)
            lpeaks.append(peaks)
    # save data and peak information to disk
    for i in range(len(ldata)):
        ldata[i].to_csv(out_path + '/dataset_%02d.csv' % (i+1), index=False)
    dg.data_save(out_path + '/peakinfo.csv', lpeaks)
        
elif conf['model_mode'] == 'train' and conf['data_mode'] == 'preload':           
    # load pre-generated datasets from disk
    ldata, lpeaks, _ = dg.data_load(tsets, conf['dataset_dir'])
    
else:        
    # load data from disk
    if conf['data_mode'] == 'preload':
        ldata, lpeaks, _ = dg.data_load(tsets, conf['dataset_dir'])
    else:
        ldata, lpeaks, _ = dg.data_load(tsets, out_path)
In [7]:
print("total number of baseline variations     : {0}".format(len(baselines)))
print("total number of spectra per baseline    : {0}".format(nsets))
print("total number of datasets per model      : {0}".format(tsets))
print("total number of inference runs          : {0}".format(nsets*len(baselines)**2))
total number of baseline variations     : 3
total number of spectra per baseline    : 4
total number of datasets per model      : 12
total number of inference runs          : 36
In [8]:
# plot datasets
fig.plot_datasets(ldata, lpeaks, dims=(int(tsets/2),2), figure_size=(12,int(tsets*(1.8))), 
                            savefig='yes', fname=file_basename, scenario='baseline', labels=lbline)

Initialize models and run inference

In [9]:
# convert pandas data to numpy arrays
x_val = np.array(xval, dtype='float32')

# store dataset y-values in list
cols = ldata[0].columns
y_val = [ldata[i][cols].values for i in range(len(ldata))]
In [10]:
# initialize models and run inference
models = []
traces = []

# list of tuples with (model baseline, data baseline) combination
lmodbase = []

# actual run number
run = 1

# total number of inference runs
truns = nsets * len(baselines)**2

for bl in baselines:
    if conf['model_mode'] == 'train':
        # for each baseline model run inference on all spectra
        print("running: baseline-{0} model".format(bl))
    
    for i in range(len(ldata)):
        if conf['peak_info'] == 'yes':
            plist = lpeaks[i].flatten()
            plist.sort()
            model_g = mdl.model_pvoigt(xvalues=x_val, observations=y_val[i], npeaks=3, 
                                  mu_peaks=plist, pmodel=conf['prior_model'], baseline=bl)
        else:
            model_g = mdl.model_pvoigt(xvalues=x_val, observations=y_val[i], npeaks=3,
                                                  pmodel=conf['prior_model'], baseline=bl)
                
        models.append(model_g)

        with model_g:
            if conf['model_mode'] == 'train':
                print("({2}/{3}) running inference on dataset #{0}/{1} [{4} model: {5} data]"
                      .format(i+1,len(ldata),run,truns,bl,lbline[i]))
                trace_g = pm.sample(conf['nsamples'], init=conf['init_mode'], cores=conf['ncores'])
                lmodbase += [(bl,lbline[i])]
                traces.append(trace_g)
                # save inference results
                pm.backends.text.dump(out_path + '/traces_%02d' % (run), trace_g)
            else:
                # load traces from disk
                print("loading dataset #{0}/{1}".format(run,truns))
                trace_g = pm.backends.text.load(out_path + '/traces_%02d' % (run))
                traces.append(trace_g)
            run += 1
running: baseline-none model
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(1/36) running inference on dataset #1/12 [none model: none spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, sigma, mu, amp]
Sampling 2 chains, 3 divergences: 100%|██████████| 5000/5000 [00:14<00:00, 341.69draws/s]
There were 3 divergences after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.8876808800984346, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.8866596856377387, but should be close to 0.8. Try to increase the number of tuning steps.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(2/36) running inference on dataset #2/12 [none model: none spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [02:03<00:00, 40.38draws/s]
The acceptance probability does not match the target. It is 0.8806176626092148, but should be close to 0.8. Try to increase the number of tuning steps.
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(3/36) running inference on dataset #3/12 [none model: none spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:27<00:00, 179.86draws/s]
The acceptance probability does not match the target. It is 0.9132572539832692, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.9555172724041762, but should be close to 0.8. Try to increase the number of tuning steps.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(4/36) running inference on dataset #4/12 [none model: none spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, sigma, mu, amp]
Sampling 2 chains, 1 divergences: 100%|██████████| 5000/5000 [00:15<00:00, 329.95draws/s]
There was 1 divergence after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.9203805758688152, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.913592137097178, but should be close to 0.8. Try to increase the number of tuning steps.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(5/36) running inference on dataset #5/12 [none model: offset spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:19<00:00, 254.37draws/s]
The acceptance probability does not match the target. It is 0.8811464734550873, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.8810906037213682, but should be close to 0.8. Try to increase the number of tuning steps.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(6/36) running inference on dataset #6/12 [none model: offset spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:32<00:00, 152.67draws/s]
The acceptance probability does not match the target. It is 0.9433641270067629, but should be close to 0.8. Try to increase the number of tuning steps.
The rhat statistic is larger than 1.4 for some parameters. The sampler did not converge.
The estimated number of effective samples is smaller than 200 for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(7/36) running inference on dataset #7/12 [none model: offset spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:17<00:00, 293.89draws/s]
The acceptance probability does not match the target. It is 0.8826582394419025, but should be close to 0.8. Try to increase the number of tuning steps.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(8/36) running inference on dataset #8/12 [none model: offset spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:42<00:00, 117.99draws/s]
The acceptance probability does not match the target. It is 0.9224319880810764, but should be close to 0.8. Try to increase the number of tuning steps.
The rhat statistic is larger than 1.2 for some parameters.
The estimated number of effective samples is smaller than 200 for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(9/36) running inference on dataset #9/12 [none model: linear spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, sigma, mu, amp]
Sampling 2 chains, 1 divergences: 100%|██████████| 5000/5000 [00:28<00:00, 172.55draws/s]
There was 1 divergence after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.902364324035276, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.9569714740136873, but should be close to 0.8. Try to increase the number of tuning steps.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(10/36) running inference on dataset #10/12 [none model: linear spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, sigma, mu, amp]
Sampling 2 chains, 11 divergences: 100%|██████████| 5000/5000 [00:38<00:00, 129.77draws/s]
There were 5 divergences after tuning. Increase `target_accept` or reparameterize.
There were 6 divergences after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.9227807144707094, but should be close to 0.8. Try to increase the number of tuning steps.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(11/36) running inference on dataset #11/12 [none model: linear spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:19<00:00, 260.40draws/s]
The acceptance probability does not match the target. It is 0.9198507245235794, but should be close to 0.8. Try to increase the number of tuning steps.
The rhat statistic is larger than 1.4 for some parameters. The sampler did not converge.
The estimated number of effective samples is smaller than 200 for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(12/36) running inference on dataset #12/12 [none model: linear spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:15<00:00, 332.61draws/s]
The acceptance probability does not match the target. It is 0.8811731872484613, but should be close to 0.8. Try to increase the number of tuning steps.
running: baseline-offset model
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(13/36) running inference on dataset #1/12 [offset model: none spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:23<00:00, 208.44draws/s]
The acceptance probability does not match the target. It is 0.8799842032401529, but should be close to 0.8. Try to increase the number of tuning steps.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(14/36) running inference on dataset #2/12 [offset model: none spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [02:00<00:00, 41.47draws/s]
The acceptance probability does not match the target. It is 0.9102094601544444, but should be close to 0.8. Try to increase the number of tuning steps.
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(15/36) running inference on dataset #3/12 [offset model: none spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:28<00:00, 175.24draws/s]
The acceptance probability does not match the target. It is 0.8946294234768238, but should be close to 0.8. Try to increase the number of tuning steps.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(16/36) running inference on dataset #4/12 [offset model: none spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:24<00:00, 200.81draws/s]
The acceptance probability does not match the target. It is 0.892371662357946, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.9036780672243475, but should be close to 0.8. Try to increase the number of tuning steps.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(17/36) running inference on dataset #5/12 [offset model: offset spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [03:32<00:00, 23.51draws/s]
The acceptance probability does not match the target. It is 0.9153588454390001, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.8974156573293673, but should be close to 0.8. Try to increase the number of tuning steps.
The rhat statistic is larger than 1.4 for some parameters. The sampler did not converge.
The estimated number of effective samples is smaller than 200 for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(18/36) running inference on dataset #6/12 [offset model: offset spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [10:55<00:00,  7.63draws/s]
The acceptance probability does not match the target. It is 0.8789279850078262, but should be close to 0.8. Try to increase the number of tuning steps.
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(19/36) running inference on dataset #7/12 [offset model: offset spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:32<00:00, 152.18draws/s]
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(20/36) running inference on dataset #8/12 [offset model: offset spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a0, sigma, mu, amp]
Sampling 2 chains, 7 divergences: 100%|██████████| 5000/5000 [00:17<00:00, 285.01draws/s]
There were 5 divergences after tuning. Increase `target_accept` or reparameterize.
There were 2 divergences after tuning. Increase `target_accept` or reparameterize.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(21/36) running inference on dataset #9/12 [offset model: linear spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a0, sigma, mu, amp]
Sampling 2 chains, 1,496 divergences: 100%|██████████| 5000/5000 [00:30<00:00, 166.48draws/s]
There were 1496 divergences after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.1239378606545137, but should be close to 0.8. Try to increase the number of tuning steps.
The rhat statistic is larger than 1.4 for some parameters. The sampler did not converge.
The estimated number of effective samples is smaller than 200 for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(22/36) running inference on dataset #10/12 [offset model: linear spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:16<00:00, 304.34draws/s]
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(23/36) running inference on dataset #11/12 [offset model: linear spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:12<00:00, 404.37draws/s]
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(24/36) running inference on dataset #12/12 [offset model: linear spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:11<00:00, 432.97draws/s]
running: baseline-linear model
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(25/36) running inference on dataset #1/12 [linear model: none spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a1, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:24<00:00, 206.62draws/s]
The acceptance probability does not match the target. It is 0.8862663233368456, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.8818071224801817, but should be close to 0.8. Try to increase the number of tuning steps.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(26/36) running inference on dataset #2/12 [linear model: none spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a1, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [01:57<00:00, 42.54draws/s]
The acceptance probability does not match the target. It is 0.8897420663981468, but should be close to 0.8. Try to increase the number of tuning steps.
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(27/36) running inference on dataset #3/12 [linear model: none spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a1, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:50<00:00, 98.23draws/s] 
The acceptance probability does not match the target. It is 0.9597419257311031, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.9435379086944754, but should be close to 0.8. Try to increase the number of tuning steps.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(28/36) running inference on dataset #4/12 [linear model: none spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a1, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:25<00:00, 197.73draws/s]
The acceptance probability does not match the target. It is 0.9292878871399108, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.9257149860772222, but should be close to 0.8. Try to increase the number of tuning steps.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(29/36) running inference on dataset #5/12 [linear model: offset spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a1, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [04:12<00:00, 19.83draws/s]
The acceptance probability does not match the target. It is 0.9606316496921812, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.9297855507532216, but should be close to 0.8. Try to increase the number of tuning steps.
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(30/36) running inference on dataset #6/12 [linear model: offset spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a1, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [08:22<00:00,  9.94draws/s]
The acceptance probability does not match the target. It is 0.8865287236477241, but should be close to 0.8. Try to increase the number of tuning steps.
The chain reached the maximum tree depth. Increase max_treedepth, increase target_accept or reparameterize.
The number of effective samples is smaller than 10% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(31/36) running inference on dataset #7/12 [linear model: offset spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a1, a0, sigma, mu, amp]
Sampling 2 chains, 189 divergences: 100%|██████████| 5000/5000 [00:45<00:00, 109.22draws/s]
There were 64 divergences after tuning. Increase `target_accept` or reparameterize.
There were 125 divergences after tuning. Increase `target_accept` or reparameterize.
The estimated number of effective samples is smaller than 200 for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(32/36) running inference on dataset #8/12 [linear model: offset spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a1, a0, sigma, mu, amp]
Sampling 2 chains, 46 divergences: 100%|██████████| 5000/5000 [00:31<00:00, 157.76draws/s]
There were 21 divergences after tuning. Increase `target_accept` or reparameterize.
There were 25 divergences after tuning. Increase `target_accept` or reparameterize.
The number of effective samples is smaller than 10% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(33/36) running inference on dataset #9/12 [linear model: linear spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a1, a0, sigma, mu, amp]
Sampling 2 chains, 1,593 divergences: 100%|██████████| 5000/5000 [01:29<00:00, 56.10draws/s] 
The acceptance probability does not match the target. It is 0.9945247085810575, but should be close to 0.8. Try to increase the number of tuning steps.
There were 1593 divergences after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.09314286416259913, but should be close to 0.8. Try to increase the number of tuning steps.
The rhat statistic is larger than 1.4 for some parameters. The sampler did not converge.
The estimated number of effective samples is smaller than 200 for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(34/36) running inference on dataset #10/12 [linear model: linear spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a1, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [00:51<00:00, 97.40draws/s] 
The acceptance probability does not match the target. It is 0.9755380555208738, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.9783137072476755, but should be close to 0.8. Try to increase the number of tuning steps.
The number of effective samples is smaller than 10% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(35/36) running inference on dataset #11/12 [linear model: linear spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a1, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [01:07<00:00, 74.26draws/s] 
The acceptance probability does not match the target. It is 0.9726436045313414, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.9629308659754747, but should be close to 0.8. Try to increase the number of tuning steps.
The number of effective samples is smaller than 25% for some parameters.
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
(36/36) running inference on dataset #12/12 [linear model: linear spectrum]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [epsilon, sigma_e, a1, a0, sigma, mu, amp]
Sampling 2 chains, 0 divergences: 100%|██████████| 5000/5000 [02:34<00:00, 32.26draws/s] 
The acceptance probability does not match the target. It is 0.9980757659702013, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.9907782623409604, but should be close to 0.8. Try to increase the number of tuning steps.
The number of effective samples is smaller than 25% for some parameters.

Model visualization

In [11]:
# display first model
pm.model_to_graphviz(models[0])
Out[11]:
%3 cluster1 x 3 1 x 3 cluster100 100 cluster15 x 100 15 x 100 amp amp ~ Uniform y y ~ Deterministic amp->y sigma sigma ~ Lognormal sigma->y mu mu ~ Uniform mu->y y_pred y_pred ~ Normal y->y_pred epsilon epsilon ~ HalfNormal epsilon->y_pred sigma_e sigma_e ~ Gamma sigma_e->epsilon
In [16]:
# save model figure as image, for each model
z = 0
for i in range(len(models)):
    if i % len(lbline) == 0:
        img = pm.model_to_graphviz(models[i])
        img.render(filename=file_basename + '_model_{0}'.format(baselines[z]), format='png');
        z += 1

Collect results and save

In [17]:
# posterior predictive traces
ppc = [pm.sample_posterior_predictive(traces[i], samples=500, model=models[i]) for i in range(len(traces))]
/home/johan/VirtualEnv/ppsda/lib/python3.6/site-packages/pymc3/sampling.py:1247: UserWarning: samples parameter is smaller than nchains times ndraws, some draws and/or chains may not be represented in the returned posterior predictive sample
  "samples parameter is smaller than nchains times ndraws, some draws "
100%|██████████| 500/500 [00:01<00:00, 426.45it/s]
/home/johan/VirtualEnv/ppsda/lib/python3.6/site-packages/pymc3/sampling.py:1247: UserWarning: samples parameter is smaller than nchains times ndraws, some draws and/or chains may not be represented in the returned posterior predictive sample
  "samples parameter is smaller than nchains times ndraws, some draws "
100%|██████████| 500/500 [00:01<00:00, 441.73it/s]
100%|██████████| 500/500 [00:01<00:00, 447.59it/s]
100%|██████████| 500/500 [00:01<00:00, 454.37it/s]
100%|██████████| 500/500 [00:01<00:00, 457.06it/s]
100%|██████████| 500/500 [00:01<00:00, 444.72it/s]
100%|██████████| 500/500 [00:01<00:00, 451.74it/s]
100%|██████████| 500/500 [00:01<00:00, 455.67it/s]
100%|██████████| 500/500 [00:01<00:00, 446.21it/s]
100%|██████████| 500/500 [00:01<00:00, 453.87it/s]
100%|██████████| 500/500 [00:01<00:00, 442.53it/s]
100%|██████████| 500/500 [00:01<00:00, 426.30it/s]
100%|██████████| 500/500 [00:01<00:00, 402.17it/s]
100%|██████████| 500/500 [00:01<00:00, 405.96it/s]
100%|██████████| 500/500 [00:01<00:00, 409.27it/s]
100%|██████████| 500/500 [00:01<00:00, 422.22it/s]
100%|██████████| 500/500 [00:01<00:00, 424.12it/s]
100%|██████████| 500/500 [00:01<00:00, 428.44it/s]
100%|██████████| 500/500 [00:01<00:00, 428.18it/s]
100%|██████████| 500/500 [00:01<00:00, 423.52it/s]
100%|██████████| 500/500 [00:01<00:00, 423.06it/s]
100%|██████████| 500/500 [00:01<00:00, 418.21it/s]
100%|██████████| 500/500 [00:01<00:00, 421.21it/s]
100%|██████████| 500/500 [00:01<00:00, 420.22it/s]
100%|██████████| 500/500 [00:01<00:00, 392.85it/s]
100%|██████████| 500/500 [00:01<00:00, 395.26it/s]
100%|██████████| 500/500 [00:01<00:00, 375.22it/s]
100%|██████████| 500/500 [00:01<00:00, 378.96it/s]
100%|██████████| 500/500 [00:01<00:00, 353.89it/s]
100%|██████████| 500/500 [00:01<00:00, 397.76it/s]
100%|██████████| 500/500 [00:01<00:00, 399.65it/s]
100%|██████████| 500/500 [00:01<00:00, 396.65it/s]
100%|██████████| 500/500 [00:01<00:00, 396.02it/s]
100%|██████████| 500/500 [00:01<00:00, 391.72it/s]
100%|██████████| 500/500 [00:01<00:00, 397.85it/s]
100%|██████████| 500/500 [00:01<00:00, 396.12it/s]
In [27]:
# various plots to inspect the inference results
varnames = mdl.get_varnames(traces[20])

#az.plot_trace(traces[2], varnames, compact=True);
#az.plot_trace(traces[2], varnames, divergences='top');
#az.plot_autocorr(traces[0], varnames);
#az.plot_posterior(traces[20], varnames);

#for idx, trace in enumerate(traces):
#    az.plot_forest(trace, var_names = varnames, r_hat=True, ess=True);

#az.summary(traces[20], varnames)
In [19]:
if conf['model_mode'] == 'train':
    # collect the results and display
    df = res.get_results_summary(traces, ppc, y_val, epsilon_real=0.05, sets=tsets, 
                                 labels=lmodbase, multimodels='yes')
else:
    # load results from disk
    df = pd.read_csv(file_basename + '.csv')
    df.index += 1
    
    # create list of tuples with model/data combinations
    lm = df['model'].to_list()
    ld = df['data'].to_list()
    lmodbase = list(zip(lm,ld))
df
/home/johan/VirtualEnv/ppsda/lib/python3.6/site-packages/arviz/stats/stats.py:1126: UserWarning: For one or more samples the posterior variance of the log predictive densities exceeds 0.4. This could be indication of WAIC starting to fail. 
See http://arxiv.org/abs/1507.04544 for details
  "For one or more samples the posterior variance of the log predictive "
Out[19]:
r_hat mcse ess bfmi r2 waic epsilon epsilon_real model data
1 1.000000 0.000000 5510.600000 1.127605 0.999329 -2074.246111 0.120168 0.05 none none
2 1.006000 0.003300 1969.800000 1.090027 0.999831 -3819.522704 0.067510 0.05 none none
3 1.000000 0.000000 3697.800000 1.009928 0.999906 -4639.782160 0.051408 0.05 none none
4 1.000000 0.000000 4459.100000 1.062722 0.999925 -4765.128387 0.049294 0.05 none none
5 1.000000 0.028700 3269.200000 1.003770 0.346038 9781.795314 6.284553 0.05 none offset
6 1.401000 17.335600 1575.500000 1.071279 0.577767 9250.895704 5.265536 0.05 none offset
7 1.000000 0.042200 3913.000000 0.995950 0.223397 9602.140417 5.917212 0.05 none offset
8 1.225000 7.920800 704.200000 0.068170 0.140662 8763.857386 4.420850 0.05 none offset
9 1.000000 0.020100 2353.000000 0.975486 0.657125 9940.282678 6.627973 0.05 none linear
10 1.000000 0.035400 3065.000000 0.969736 0.577749 9824.604464 6.378064 0.05 none linear
11 1.668000 5.138700 6.900000 1.003023 0.307765 9591.030016 5.708051 0.05 none linear
12 1.000000 0.040700 4783.500000 1.034751 0.474939 9759.942551 6.236511 0.05 none linear
13 1.000000 0.000000 3949.160000 0.852258 0.999326 -2071.713272 0.119808 0.05 offset none
14 1.000000 0.001360 2738.200000 0.871033 0.999831 -3820.674663 0.067275 0.05 offset none
15 1.000000 0.000000 4049.960000 0.869515 0.999905 -4629.643992 0.051425 0.05 offset none
16 1.000000 0.000000 4638.920000 0.850010 0.999925 -4756.554167 0.049310 0.05 offset none
17 1.199200 0.217880 5592.800000 1.015800 0.999956 -4686.480673 0.050327 0.05 offset offset
18 1.000000 0.004400 3326.760000 1.018804 0.999962 -4727.255072 0.049682 0.05 offset offset
19 1.000000 0.000240 5980.040000 1.021819 0.999941 -4663.150999 0.050723 0.05 offset offset
20 1.000000 0.000000 4484.520000 1.050378 0.999896 -4772.181533 0.048940 0.05 offset offset
21 1.987600 4.448160 2.240000 0.506347 0.991738 8574.668528 2.257432 0.05 offset linear
22 1.000000 0.006480 3137.640000 1.046749 0.898877 7642.916275 3.068481 0.05 offset linear
23 1.000000 0.001640 6790.720000 1.039013 0.968282 4923.631377 1.241606 0.05 offset linear
24 1.000000 0.002120 7569.400000 1.013627 0.944392 6359.625621 2.003590 0.05 offset linear
25 1.000000 0.000000 4494.423077 0.852759 0.999324 -2074.851065 0.119835 0.05 linear none
26 1.000000 0.001269 2928.384615 0.797172 0.999831 -3820.493969 0.067288 0.05 linear none
27 1.000000 0.000000 3241.000000 0.875605 0.999906 -4628.939357 0.051409 0.05 linear none
28 1.000000 0.000000 4285.423077 0.906409 0.999925 -4755.342130 0.049321 0.05 linear none
29 1.004231 0.001154 1154.423077 1.021687 0.999956 -4685.902722 0.050323 0.05 linear offset
30 1.002308 0.005462 724.846154 1.009361 0.999962 -4727.638709 0.049624 0.05 linear offset
31 1.018846 0.000846 386.769231 1.060105 0.999941 -4663.728160 0.050703 0.05 linear offset
32 1.001154 0.000038 1288.884615 1.066758 0.999895 -4770.508529 0.048949 0.05 linear offset
33 1.410769 0.002308 99.730769 1.287732 0.999980 -4740.269105 0.049618 0.05 linear linear
34 1.003077 0.000000 1507.884615 0.952294 0.999971 -4609.426586 0.051633 0.05 linear linear
35 1.000000 0.000000 1789.846154 0.993766 0.999950 -4735.628990 0.049512 0.05 linear linear
36 1.000000 0.000000 1587.846154 0.935730 0.999968 -4809.572121 0.048310 0.05 linear linear
In [20]:
if conf['model_mode'] == 'train':
    # save results to .csv
    df.to_csv(file_basename + '.csv', index=False)

Plot posterior

In [21]:
fig.plot_posterior(x_val, ldata, traces, ppc, dims=(int(truns/2),2), figure_size=(12,int(truns*(1.8))),
            savefig='yes', fname=file_basename, showpeaks='no', sets=tsets, labels=lmodbase, scenario='baseline')
In [22]:
fig.plot_posterior(x_val, ldata, traces, ppc, dims=(int(truns/2),2), figure_size=(12,int(truns*(1.8))),
            savefig='yes', fname=file_basename, showpeaks='yes', sets=tsets, labels=lmodbase, scenario='baseline')
In [23]:
cnf.close(out_path)